"""Gitlab helper."""
import argparse
import copy
import functools
import json
import os
import re
import typing
from urllib import parse

import gitlab
import gql
import gql.transport.requests
import graphql
import sentry_sdk

from cki_lib.logger import get_logger
from cki_lib.session import get_session

from . import misc
from . import yaml

GITLAB_TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"

LOGGER = get_logger('cki_lib.gitlab')
SESSION = get_session(
    'cki_lib.gitlab',
    retry_args={
        'status_forcelist': [500, 504],
    },
)

# Default per_page value to use for python-gitlab list methods.
# https://python-gitlab.readthedocs.io/en/stable/api-usage.html#pagination
# A value of None means take the GL default (currently 20). Current API max is 100.
PER_PAGE = None


class _GitLabTransport(gql.transport.requests.RequestsHTTPTransport):
    """A RequestsHTTPTransport with a cki_lib session object used by our connect() method."""

    def __init__(self, url: str, token: str | None) -> None:
        parts = parse.urlsplit(url)
        super().__init__(f'{parts.scheme}://{parts.netloc}/api/graphql')
        self.session = SESSION
        if (bearer_token := (token or get_token(url))) is not None:
            self.headers = {'Authorization': f'Bearer {bearer_token}'}

    def connect(self) -> None:
        """Skip connect as we have our own session."""

    def close(self) -> None:
        """Skip close as we have our own session."""


class _GitLabClient(gql.Client):
    """Gitlab GraphQL client."""

    def __init__(self, url: str, token: str | None) -> None:
        """Create a new client."""
        parts = parse.urlsplit(url)
        self.url = f'{parts.scheme}://{parts.netloc}'
        super().__init__(transport=_GitLabTransport(url, token))

    def query(
        self,
        query: str,
        variable_values: typing.Optional[typing.Dict[str, typing.Any]] = None,
        paged_key: typing.Optional[str] = None,
        **kwargs: typing.Any,
    ) -> typing.Dict[str, typing.Any]:
        """Execute a GraphQL query.

        For paged data, specify the location via paged_key, and provide
        $first/$after parameters together with a pageInfo dictionary in the
        right place. Example:

            c = gitlab.get_graphql_client('https://gitlab.com')
            results = c.query('''
                query($first: Boolean = True, $after: String = "") {
                    project(fullPath: "group/project") {
                        id @include(if: $first)
                        mergeRequests(after: $after) {
                            nodes { iid }
                            pageInfo { hasNextPage endCursor }
                        }
                    }
                }
            ''', paged_key='project/mergeRequests')
        """
        results: typing.Dict[str, typing.Any] = {}
        variable_values = copy.deepcopy(variable_values or {})
        while True:
            new_results: typing.Dict[str, typing.Any] = self.execute(
                self._cache_gql(query), variable_values=variable_values, **kwargs)
            if not paged_key:
                return new_results
            paged_data = misc.get_nested_key(new_results, paged_key)
            if not results:
                results = new_results
            else:
                more_data = misc.get_nested_key(results, paged_key)
                if 'nodes' in more_data:
                    more_data['nodes'].extend(paged_data['nodes'])
                if 'edges' in more_data:
                    more_data['edges'].extend(paged_data['edges'])
            if not misc.get_nested_key(paged_data, 'pageInfo/hasNextPage'):
                break
            variable_values['first'] = False
            variable_values['after'] = misc.get_nested_key(paged_data, 'pageInfo/endCursor')
            # https://gitlab.com/gitlab-org/gitlab/-/issues/476554
            if not variable_values['after']:
                break
        return results

    @staticmethod
    @functools.lru_cache(maxsize=None)
    def _cache_gql(query: str) -> graphql.DocumentNode:
        return gql.gql(query)


def get_graphql_client(url: str, token: str | None = None) -> _GitLabClient:
    """Return a Gitlab GraphQL client.

    Parameters:
        url: GitLab instance/group/project URL
        token: private token, defaults to get_token()
    """
    return _GitLabClient(url, token)


def get_token(url, env_name=None):
    """Return a Gitlab API token.

    The tokens have to be provided as environment variables like this:
        export GITLAB_TOKENS='{
          "gitlab.com": "COM_GITLAB_TOKEN",
          "gitlab.com/some-group": "COM_GITLAB_GROUP_TOKEN",
          "gitlab.com/some-group/project": "COM_GITLAB_PROJECT_TOKEN",
        }'
        export COM_GITLAB_TOKEN='1234567890abcedf'
        export COM_GITLAB_GROUP_TOKEN='234567890abcedfg'
        export COM_GITLAB_PROJECT_TOKEN='34567890abcedfgh'

    If multiple tokens match the URL, the longest matching token is returned.

    Parameters:
        url: GitLab instance/group/project URL
        env_name: name of the environment variable with a JSON host=name dict
            containing the names of the environment variables with the tokens
    """
    # would be nice to get tokens from a '.config/cki/config'/CKI_CONFIG file
    token_names = sorted([
        v for v in yaml.load(contents=os.environ.get(env_name or 'GITLAB_TOKENS', '{}')).items()
        if url.startswith(f'https://{v[0]}') or url.startswith(f'http://{v[0]}')
    ], key=lambda v: len(v[0]))
    return os.environ.get(token_names[-1][1]) if token_names else None


def get_instance(url, token=None, env_name=None):
    """Return a Gitlab API instance.

    Parameters:
        url: GitLab instance/group/project/group URL
        token: private GitLab API token
        env_name: environment variable name to use for get_token()
    """
    parts = parse.urlsplit(url)
    return gitlab.Gitlab(f'{parts.scheme}://{parts.netloc}', session=SESSION,
                         private_token=token or get_token(url, env_name), per_page=PER_PAGE)


def get_variables(gl_pipeline):
    """Return a dict with the pipeline variables."""
    return {v.key: v.value
            for v in gl_pipeline.variables.list(iterator=True)}


def parse_gitlab_url(url):
    """Parse a GitLab URL and return a Gitlab object.

    At the moment, the following URL formats are supported:

    Projects:
        https://gitlab.com/group/project
    Pipelines:
        https://gitlab.com/group/project/-/pipelines/1234
    Jobs:
        https://gitlab.com/group/project/-/jobs/1234
    MRs:
        https://gitlab.com/group/project/-/merge_requests/1234
    MR notes:
        https://gitlab.com/group/project/-/merge_requests/1234#note_1234567890
    Pipeline schedule:
        https://gitlab.com/group/project/-/pipeline_schedules/1234
    Groups:
        https://gitlab.com/groups/group

    Args:
        url: Full GitLab URL.

    Returns:
        (gitlab.Gitlab, Object for the URL)
    """
    # pylint: disable=too-many-return-statements
    url_parts = parse.urlsplit(url)

    instance = get_instance(url)

    # Match with groups URL
    if match := re.match(r'/groups/(.*)', url_parts.path):
        group = instance.groups.get(match[1])
        return instance, group

    # Match with pipeline schedule URL
    if '/-/pipeline_schedules' in url_parts.path:
        match = re.match(r'/(.*)/-/pipeline_schedules/(\d+)', url_parts.path)
        project = instance.projects.get(match[1], lazy=True)
        schedule = project.pipelineschedules.get(int(match[2]))
        return instance, schedule

    # Match with Pipeline URL
    if '/-/pipelines' in url_parts.path:
        match = re.match(r'/(.*)/-/pipelines/(\d+)', url_parts.path)
        project = instance.projects.get(match[1], lazy=True)
        pipeline = project.pipelines.get(int(match[2]))
        return instance, pipeline

    # Match with Jobs URL
    if '/-/jobs' in url_parts.path:
        match = re.match(r'/(.*)/-/jobs/(\d+)', url_parts.path)
        project = instance.projects.get(match[1], lazy=True)
        job = project.jobs.get(int(match[2]))
        return instance, job

    # Match with Merge Request URL
    if '/-/merge_requests' in url_parts.path:
        match = re.match(r'/(.*)/-/merge_requests/(\d+)', url_parts.path)
        project = instance.projects.get(match[1], lazy=True)
        mergerequest = project.mergerequests.get(int(match[2]))

        # MR Notes are on the same URL as the Merge Request
        if url_parts.fragment and 'note_' in url_parts.fragment:
            note_id = url_parts.fragment.replace('note_', '')
            note = mergerequest.notes.get(note_id)
            return instance, note

        return instance, mergerequest

    # Match with Issue URL
    if '/-/issues' in url_parts.path:
        match = re.match(r'/(.*)/-/issues/(\d+)', url_parts.path)
        project = instance.projects.get(match[1], lazy=True)
        issue = project.issues.get(int(match[2]))
        return instance, issue

    # Fallback to project
    project = instance.projects.get(url_parts.path[1:])
    return instance, project


def main(argv: list[str] | None = None) -> str:
    """Access the GitLab GraphQL API."""
    parser = argparse.ArgumentParser(description='Access the GitLab GraphQL API.')
    parser.add_argument('--gitlab-url', default=os.environ.get('GITLAB_URL'),
                        help='GitLab URL')
    parser.add_argument('--private-token',
                        help='GitLab access token, by default derived from GITLAB_TOKENS')
    parser.add_argument('--graphql-query', required=True,
                        help='GraphQL query')
    parser.add_argument('--variables', action=misc.StoreNameValuePair, nargs='*',
                        metavar='KEY=VALUE', help='JSON-formatted variable values')
    args = parser.parse_args(argv)

    return json.dumps(get_graphql_client(args.gitlab_url, token=args.private_token).query(
        args.graphql_query,
        variable_values={k: yaml.load(contents=v) for k, v in args.variables.items()},
    ))


if __name__ == '__main__':
    misc.sentry_init(sentry_sdk)
    print(main())
